import pandas as pd
import numpy as np
import folium
from folium.plugins import HeatMap
pd.set_option('display.max_columns', None)
accident_df = pd.read_parquet('./data/accident_data.parquet')
accident_df.head()
| Accident_Index | 1st_Road_Class | 1st_Road_Number | 2nd_Road_Class | 2nd_Road_Number | Accident_Severity | Carriageway_Hazards | Date | Day_of_Week | Did_Police_Officer_Attend_Scene_of_Accident | Junction_Control | Junction_Detail | Latitude | Light_Conditions | Local_Authority_(District) | Local_Authority_(Highway) | Location_Easting_OSGR | Location_Northing_OSGR | Longitude | LSOA_of_Accident_Location | Number_of_Casualties | Number_of_Vehicles | Pedestrian_Crossing-Human_Control | Pedestrian_Crossing-Physical_Facilities | Police_Force | Road_Surface_Conditions | Road_Type | Special_Conditions_at_Site | Speed_limit | Time | Urban_or_Rural_Area | Weather_Conditions | Year | InScotland | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 200501BS00001 | A | 3218.0 | None | 0.0 | Serious | None | 04/01/2005 | Tuesday | 1.0 | Data missing or out of range | Not at junction or within 20 metres | 51.489096 | None | Kensington and Chelsea | Kensington and Chelsea | 525680.0 | 178240.0 | -0.191170 | E01002849 | 1 | 1 | 0.0 | 1.0 | Metropolitan Police | Wet or damp | Single carriageway | None | 30 | 17:42 | Urban | Raining no high winds | 2005 | No |
| 1 | 200501BS00002 | B | 450.0 | C | 0.0 | Slight | None | 05/01/2005 | Wednesday | 1.0 | Auto traffic signal | Crossroads | 51.520075 | Darkness - lights lit | Kensington and Chelsea | Kensington and Chelsea | 524170.0 | 181650.0 | -0.211708 | E01002909 | 1 | 1 | 0.0 | 5.0 | Metropolitan Police | Dry | Dual carriageway | None | 30 | 17:36 | Urban | Fine no high winds | 2005 | No |
| 2 | 200501BS00003 | C | 0.0 | None | 0.0 | Slight | None | 06/01/2005 | Thursday | 1.0 | Data missing or out of range | Not at junction or within 20 metres | 51.525301 | Darkness - lights lit | Kensington and Chelsea | Kensington and Chelsea | 524520.0 | 182240.0 | -0.206458 | E01002857 | 1 | 2 | 0.0 | 0.0 | Metropolitan Police | Dry | Single carriageway | None | 30 | 00:15 | Urban | Fine no high winds | 2005 | No |
| 3 | 200501BS00004 | A | 3220.0 | None | 0.0 | Slight | None | 07/01/2005 | Friday | 1.0 | Data missing or out of range | Not at junction or within 20 metres | 51.482442 | None | Kensington and Chelsea | Kensington and Chelsea | 526900.0 | 177530.0 | -0.173862 | E01002840 | 1 | 1 | 0.0 | 0.0 | Metropolitan Police | Dry | Single carriageway | None | 30 | 10:35 | Urban | Fine no high winds | 2005 | No |
| 4 | 200501BS00005 | None | 0.0 | None | 0.0 | Slight | None | 10/01/2005 | Monday | 1.0 | Data missing or out of range | Not at junction or within 20 metres | 51.495752 | Darkness - lighting unknown | Kensington and Chelsea | Kensington and Chelsea | 528060.0 | 179040.0 | -0.156618 | E01002863 | 1 | 1 | 0.0 | 0.0 | Metropolitan Police | Wet or damp | Single carriageway | None | 30 | 21:13 | Urban | Fine no high winds | 2005 | No |
accident_df.shape
(1048575, 34)
accident_df.isna().sum()
Accident_Index 0 1st_Road_Class 305589 1st_Road_Number 2 2nd_Road_Class 439824 2nd_Road_Number 10803 Accident_Severity 0 Carriageway_Hazards 29 Date 0 Day_of_Week 0 Did_Police_Officer_Attend_Scene_of_Accident 269 Junction_Control 0 Junction_Detail 0 Latitude 111 Light_Conditions 2084 Local_Authority_(District) 0 Local_Authority_(Highway) 0 Location_Easting_OSGR 111 Location_Northing_OSGR 111 Longitude 112 LSOA_of_Accident_Location 71890 Number_of_Casualties 0 Number_of_Vehicles 0 Pedestrian_Crossing-Human_Control 21 Pedestrian_Crossing-Physical_Facilities 37 Police_Force 0 Road_Surface_Conditions 1189 Road_Type 7266 Special_Conditions_at_Site 16 Speed_limit 0 Time 100 Urban_or_Rural_Area 85 Weather_Conditions 21392 Year 0 InScotland 43 dtype: int64
Check the 'Accident_Severity' classes
accident_df['Accident_Severity'].unique()
array(['Serious', 'Slight', 'Fatal'], dtype=object)
Use only the rows that NOT contain slight accidents
selected_accidents_df = accident_df[accident_df['Accident_Severity'] != 'Slight'].copy()
selected_accidents_df.shape
(152692, 34)
Now we have 895883 fewer rows
Now we select only the columns that will be useful to create the heat map
selected_accidents_df = selected_accidents_df[['Accident_Severity', 'Latitude', 'Longitude']]
Drop NA values
selected_accidents_df.dropna(inplace=True)
selected_accidents_df.shape
(152674, 3)
Now we have 18 fewer rows
selected_accidents_df.reset_index(drop=True, inplace=True)
selected_accidents_df.head()
| Accident_Severity | Latitude | Longitude | |
|---|---|---|---|
| 0 | Serious | 51.489096 | -0.191170 |
| 1 | Serious | 51.500191 | -0.205139 |
| 2 | Serious | 51.495498 | -0.174925 |
| 3 | Serious | 51.517796 | -0.215545 |
| 4 | Serious | 51.521709 | -0.212653 |
def extract_coords_list(df, severity, severity_col='Accident_Severity', lat_col='Latitude', lng_col='Longitude'):
temp_df = df
if severity: temp_df = temp_df[temp_df[severity_col] == severity] # Filter by severity
return temp_df[[lat_col, lng_col]].values.tolist()
Major accidents (serious + fatal)
major_accident_coords = extract_coords_list(selected_accidents_df, None)
Only serious accidents
serious_accident_coords = extract_coords_list(selected_accidents_df, 'Serious')
Only fatal accidents
fatal_accident_coords = extract_coords_list(selected_accidents_df, 'Fatal')
Create the map showing the Great Britain island
gb_coords = [55.1885762167905, -5.1654628901018835] # GB island coordinates
heat_map = folium.Map(location=gb_coords, zoom_start=5.2)
Add the heat layers
def add_heatmap_layer(coord_set, control_name, is_enabled=False, m=heat_map):
HeatMap(coord_set, name=control_name, show=is_enabled).add_to(m)
add_heatmap_layer(major_accident_coords, 'Major accidents (serious + fatal)', True) # This layer is enabled by default
add_heatmap_layer(serious_accident_coords, 'Serious accidents')
add_heatmap_layer(fatal_accident_coords, 'Fatal accidents')
Add the layer control to the map
folium.LayerControl(collapsed=False).add_to(heat_map)
<folium.map.LayerControl at 0x7f8471c17970>
Map output
heat_map.save('./map/heat_map.html')
heat_map